load("LDS_Public_Workspace.RData")
LDSestim=LDS
table(LDSestim$Sowing_Date_Schedule)
T5_16Dec T4_15Dec T3_30Nov T2_20Nov T1_10Nov
665 1696 3167 1704 416
In this notebook, I use a causal machine learning estimator, i.e., multi-armed causal random forest with augmented inverse propensity score weights (Athey et al 2019), to estimate conditional average treatment effects (CATES) for agronomic practices. These CATEs are estimated for each individual farm thereby providing personalized estimates of the potential effectiveness of the practices. I then use a debiased robust estimator in a policy tree optimization (Athey and Wager 2021) to generate optimal recommendations in the form of agronomic practices that maximize potential yield gains.
Purpose: To make individualized or personalized recommendations from observational data in a data-driven manner using causal machine learning frameworks.
Advantages
Disadvantages
It requires enough sample sizes for each of the options being compared. This mean that for new innovations which have not been extensively adopted, this approach would not be beneficial.
Stylized use case: While sowing dates and many other recommendations are made on the basis of climatic, biophysical and economic aspects, there may be several individual level reasons for not following with the recommendation, e.g., family members are busy with other duties during those weeks. We propose a robust methodology that rests on causal machine learning and policy learning to make recommendations that are the most beneficial for each individual farmer.
Input data requirements: The data required is the same as for any conventional production function or impact assessment. These include yield, agronomic management variables (e.g., fertilizer applied), socio-economic variables, and input and output prices. One however, needs enough sample sizes for the treatment and control groups therefore the method works only for a technology which has been widely adopted.
Workflow:
Toolkit workflow shows a step-by-step workflow for implementing the policy learning optimization model.
load("LDS_Public_Workspace.RData")
LDSestim=LDS
table(LDSestim$Sowing_Date_Schedule)
T5_16Dec T4_15Dec T3_30Nov T2_20Nov T1_10Nov
665 1696 3167 1704 416
# Bar graphs showing percentage of farmers adopting these practices
library(tidyverse)
library(ggplot2)
bar_chart=function(dat,var){
dat|>
drop_na({{var}})|>
mutate({{var}}:=factor({{var}})|>fct_infreq())|>
ggplot()+
geom_bar(aes(y={{var}}),width = 0.3,fill="dodgerblue4")+
theme_minimal(base_size = 16)
}
sow_plot=bar_chart(LDSestim,Sowing_Date_Schedule)+labs(y="Sowing dates")
sow_plotlibrary(ggpubr)
library(tidyverse)
#Sowing dates
SowingDate_Options_Errorplot=
LDSestim%>%
drop_na(Sowing_Date_Schedule) %>%
ggerrorplot(x = "Sowing_Date_Schedule", y = "L.tonPerHectare",add = "mean", error.plot = "errorbar", color="black",size=1, ggtheme=theme_bw())+
labs(x="Sowing date options",y="Wheat yield (t/ha)")+
theme_bw(base_size = 16)+coord_flip()
SowingDate_Options_Errorplot+aes(x = fct_reorder(Sowing_Date_Schedule, L.tonPerHectare))+
xlab("Sowing date options")library(fBasics)
library(fastDummies)
LDSestim_Desc=fastDummies::dummy_cols(LDSestim, select_columns=c("G.q5305_irrigTimes_cat","Sowing_Date_Schedule","A.q112_fEdu_new"))
library(fBasics)
summ_stats <- fBasics::basicStats(LDSestim_Desc[,c("L.tonPerHectare","G.q5305_irrigTimes",
"G.q5305_irrigTimes_cat_One","G.q5305_irrigTimes_cat_Two", "G.q5305_irrigTimes_cat_Three","G.q5305_irrigTimes_cat_Fourplus","Sowing_Date_Schedule_T5_16Dec", "Sowing_Date_Schedule_T4_15Dec","Sowing_Date_Schedule_T3_30Nov", "Sowing_Date_Schedule_T2_20Nov","Sowing_Date_Schedule_T1_10Nov", "A.q112_fEdu_new_noSchooling","A.q112_fEdu_new_primary","A.q112_fEdu_new_matriculation", "A.q112_fEdu_new_seniorSecondary", "A.q112_fEdu_new_bachelors","A.q112_fEdu_new_Postgrad","Nperha","P2O5perha","Weedmanaged","variety_type_NMWV","Sowing_Date_Early","I.q5505_weedSeverity_num","I.q5509_diseaseSeverity_num","I.q5506_insectSeverity_num","I.q5502_droughtSeverity_num", "temp","precip","wc2.1_30s_elev","A.q111_fGenderdum","nitrogen_0.5cm","sand_0.5cm", "soc_5.15cm")])
summ_stats <- as.data.frame(t(summ_stats))
# Rename some of the columns for convenience
summ_stats <- summ_stats[c("Mean", "Stdev", "Minimum", "1. Quartile", "Median", "3. Quartile", "Maximum")] %>%
rename("Lower quartile" = '1. Quartile', "Upper quartile"= "3. Quartile")
summ_stats Mean Stdev Minimum Lower quartile
L.tonPerHectare 2.990635 0.854990 0.200 2.4000
G.q5305_irrigTimes 2.286183 0.767177 1.000 2.0000
G.q5305_irrigTimes_cat_One 0.134628 0.341348 0.000 0.0000
G.q5305_irrigTimes_cat_Two 0.500197 0.500033 0.000 0.0000
G.q5305_irrigTimes_cat_Three 0.311376 0.463087 0.000 0.0000
G.q5305_irrigTimes_cat_Fourplus 0.053799 0.225635 0.000 0.0000
Sowing_Date_Schedule_T5_16Dec 0.086951 0.281781 0.000 0.0000
Sowing_Date_Schedule_T4_15Dec 0.221757 0.415456 0.000 0.0000
Sowing_Date_Schedule_T3_30Nov 0.414095 0.492597 0.000 0.0000
Sowing_Date_Schedule_T2_20Nov 0.222803 0.416155 0.000 0.0000
Sowing_Date_Schedule_T1_10Nov 0.054393 0.226807 0.000 0.0000
A.q112_fEdu_new_noSchooling 0.273666 0.445869 0.000 0.0000
A.q112_fEdu_new_primary 0.301517 0.458947 0.000 0.0000
A.q112_fEdu_new_matriculation 0.213258 0.409635 0.000 0.0000
A.q112_fEdu_new_seniorSecondary 0.106564 0.308578 0.000 0.0000
A.q112_fEdu_new_bachelors 0.086297 0.280821 0.000 0.0000
A.q112_fEdu_new_Postgrad 0.018698 0.135464 0.000 0.0000
Nperha 130.220556 37.038394 0.000 105.1852
P2O5perha 59.038865 19.629879 0.000 45.4321
Weedmanaged 0.758990 0.427724 0.000 1.0000
variety_type_NMWV 0.530097 0.499126 0.000 0.0000
Sowing_Date_Early 0.277197 0.447644 0.000 0.0000
I.q5505_weedSeverity_num 2.716396 0.755250 1.000 2.0000
I.q5509_diseaseSeverity_num 1.468619 0.703302 1.000 1.0000
I.q5506_insectSeverity_num 1.603556 0.764382 1.000 1.0000
I.q5502_droughtSeverity_num 1.938677 0.881811 1.000 1.0000
temp 26.059213 0.306374 24.975 25.9250
precip 953.771636 254.254018 599.800 741.2000
wc2.1_30s_elev 68.326229 21.486412 27.000 54.0000
A.q111_fGenderdum 0.029027 0.167894 0.000 0.0000
nitrogen_0.5cm 1.598934 0.214240 1.100 1.5000
sand_0.5cm 29.880990 3.020196 19.000 28.0000
soc_5.15cm 12.701619 2.318527 7.300 11.1000
Median Upper quartile Maximum
L.tonPerHectare 3.00000 3.43000 6.50000
G.q5305_irrigTimes 2.00000 3.00000 5.00000
G.q5305_irrigTimes_cat_One 0.00000 0.00000 1.00000
G.q5305_irrigTimes_cat_Two 1.00000 1.00000 1.00000
G.q5305_irrigTimes_cat_Three 0.00000 1.00000 1.00000
G.q5305_irrigTimes_cat_Fourplus 0.00000 0.00000 1.00000
Sowing_Date_Schedule_T5_16Dec 0.00000 0.00000 1.00000
Sowing_Date_Schedule_T4_15Dec 0.00000 0.00000 1.00000
Sowing_Date_Schedule_T3_30Nov 0.00000 1.00000 1.00000
Sowing_Date_Schedule_T2_20Nov 0.00000 0.00000 1.00000
Sowing_Date_Schedule_T1_10Nov 0.00000 0.00000 1.00000
A.q112_fEdu_new_noSchooling 0.00000 1.00000 1.00000
A.q112_fEdu_new_primary 0.00000 1.00000 1.00000
A.q112_fEdu_new_matriculation 0.00000 0.00000 1.00000
A.q112_fEdu_new_seniorSecondary 0.00000 0.00000 1.00000
A.q112_fEdu_new_bachelors 0.00000 0.00000 1.00000
A.q112_fEdu_new_Postgrad 0.00000 0.00000 1.00000
Nperha 132.54321 156.00000 298.46914
P2O5perha 59.77908 72.69136 212.96296
Weedmanaged 1.00000 1.00000 1.00000
variety_type_NMWV 1.00000 1.00000 1.00000
Sowing_Date_Early 0.00000 1.00000 1.00000
I.q5505_weedSeverity_num 3.00000 3.00000 4.00000
I.q5509_diseaseSeverity_num 1.00000 2.00000 4.00000
I.q5506_insectSeverity_num 1.00000 2.00000 4.00000
I.q5502_droughtSeverity_num 2.00000 3.00000 4.00000
temp 26.05000 26.23333 26.60833
precip 890.89996 1191.60004 1874.40002
wc2.1_30s_elev 67.00000 79.00000 327.00000
A.q111_fGenderdum 0.00000 0.00000 1.00000
nitrogen_0.5cm 1.60000 1.70000 2.70000
sand_0.5cm 30.00000 32.00000 47.00000
soc_5.15cm 12.60000 14.10000 24.30000
table(LDSestim$A.q103_district, LDSestim$Sowing_Date_Schedule)
T5_16Dec T4_15Dec T3_30Nov T2_20Nov T1_10Nov
Arah 37 89 55 22 5
Araria 0 11 59 39 8
Arwal 84 66 15 16 0
Aurangabad 41 112 34 7 0
Balia 0 26 92 83 15
Banka 73 71 23 8 1
Begusarai 0 1 20 90 102
Bhagalpur 41 41 68 46 11
Buxar 24 64 84 31 4
Chandauli 26 141 30 10 1
Deoria 0 8 65 97 39
EastChamparan 22 48 96 38 1
Gaya 25 103 43 20 2
Gazipur 3 38 140 27 2
Gopalganj 6 14 104 75 11
Gorakhpur 0 2 72 122 14
Jehanabad 47 82 23 15 22
Kaimur 55 115 31 3 0
Katihar 7 3 69 31 5
Khagaria 4 15 51 71 64
Kushinagar 7 11 115 58 4
Lakhisarai 37 48 47 42 21
Madhepura 3 15 123 21 7
Maharajganj 2 10 122 68 8
Mau 3 28 116 52 5
Munger 20 61 90 27 12
Muzaffarpur 0 14 119 65 6
Nalanda 9 33 123 26 5
Patna 16 66 87 31 3
Purnia 0 0 5 2 1
Rohtas 38 106 45 5 2
Saharsa 22 70 56 14 1
Samastipur 0 2 119 73 8
Saran 4 21 131 51 2
Sheohar 3 53 104 48 1
Siddharthnagar 0 3 108 67 15
Siwan 0 19 98 81 0
Supaul 3 47 109 44 3
Vaishali 0 0 153 42 1
WestChamparan 3 39 123 36 4
library(modelsummary)
Sowpercent=datasummary_crosstab(A.q103_district ~ Sowing_Date_Schedule, data = LDSestim,output = 'data.frame')
library(reactable)
library(htmltools)
library(fontawesome)
htmltools::browsable(
tagList(
tags$button(
tagList(fontawesome::fa("download"), "Download as CSV"),
onclick = "Reactable.downloadDataCSV('Sowpercent', 'Sowpercent.csv')"
),
reactable(
Sowpercent,
searchable = TRUE,
defaultPageSize = 38,
elementId = "Sowpercent"
)
)
)Before using the causal ML model, we start with the basic OLS in which we control for the conventional crop response function inputs (e.g., fertilizer).
LDSestim$Sowing_Date_Schedule_Unordered <- factor( LDSestim$Sowing_Date_Schedule, ordered = FALSE )
# FGLS
ols = lm(L.tonPerHectare ~Sowing_Date_Schedule_Unordered+Nperha+P2O5perha+variety_type_NMWV+G.q5305_irrigTimes+I.q5505_weedSeverity_num+I.q5509_diseaseSeverity_num+I.q5506_insectSeverity_num+ A.q111_fGenderdum+Weedmanaged+temp+precip+wc2.1_30s_elev+ M.q708_marketDistance+nitrogen_0.5cm+sand_0.5cm+soc_5.15cm+O.largestPlotGPS.Latitude+O.largestPlotGPS.Longitude, data = LDSestim)
summary(ols)
Call:
lm(formula = L.tonPerHectare ~ Sowing_Date_Schedule_Unordered +
Nperha + P2O5perha + variety_type_NMWV + G.q5305_irrigTimes +
I.q5505_weedSeverity_num + I.q5509_diseaseSeverity_num +
I.q5506_insectSeverity_num + A.q111_fGenderdum + Weedmanaged +
temp + precip + wc2.1_30s_elev + M.q708_marketDistance +
nitrogen_0.5cm + sand_0.5cm + soc_5.15cm + O.largestPlotGPS.Latitude +
O.largestPlotGPS.Longitude, data = LDSestim)
Residuals:
Min 1Q Median 3Q Max
-2.0026 -0.4475 -0.0343 0.4066 3.7292
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.335e+00 1.319e+00 5.559 2.80e-08
Sowing_Date_Schedule_UnorderedT4_15Dec 2.624e-01 3.115e-02 8.423 < 2e-16
Sowing_Date_Schedule_UnorderedT3_30Nov 4.893e-01 3.153e-02 15.520 < 2e-16
Sowing_Date_Schedule_UnorderedT2_20Nov 6.948e-01 3.433e-02 20.242 < 2e-16
Sowing_Date_Schedule_UnorderedT1_10Nov 8.701e-01 4.524e-02 19.234 < 2e-16
Nperha 1.166e-03 2.482e-04 4.696 2.70e-06
P2O5perha 2.550e-03 4.474e-04 5.699 1.25e-08
variety_type_NMWV 3.156e-01 1.827e-02 17.268 < 2e-16
G.q5305_irrigTimes 3.493e-01 1.109e-02 31.483 < 2e-16
I.q5505_weedSeverity_num -7.478e-02 1.044e-02 -7.164 8.56e-13
I.q5509_diseaseSeverity_num -1.494e-02 1.339e-02 -1.116 0.26440
I.q5506_insectSeverity_num 2.505e-02 1.225e-02 2.045 0.04094
A.q111_fGenderdum -8.380e-02 4.564e-02 -1.836 0.06639
Weedmanaged 2.424e-01 1.930e-02 12.556 < 2e-16
temp 4.744e-03 2.572e-02 0.184 0.85363
precip -4.662e-05 3.178e-05 -1.467 0.14248
wc2.1_30s_elev -2.241e-03 4.688e-04 -4.780 1.79e-06
M.q708_marketDistance -2.817e-03 1.967e-03 -1.432 0.15216
nitrogen_0.5cm 1.420e-01 5.054e-02 2.809 0.00499
sand_0.5cm 1.891e-03 3.390e-03 0.558 0.57692
soc_5.15cm 1.559e-02 4.833e-03 3.226 0.00126
O.largestPlotGPS.Latitude 4.103e-02 1.912e-02 2.146 0.03194
O.largestPlotGPS.Longitude -8.853e-02 1.019e-02 -8.690 < 2e-16
(Intercept) ***
Sowing_Date_Schedule_UnorderedT4_15Dec ***
Sowing_Date_Schedule_UnorderedT3_30Nov ***
Sowing_Date_Schedule_UnorderedT2_20Nov ***
Sowing_Date_Schedule_UnorderedT1_10Nov ***
Nperha ***
P2O5perha ***
variety_type_NMWV ***
G.q5305_irrigTimes ***
I.q5505_weedSeverity_num ***
I.q5509_diseaseSeverity_num
I.q5506_insectSeverity_num *
A.q111_fGenderdum .
Weedmanaged ***
temp
precip
wc2.1_30s_elev ***
M.q708_marketDistance
nitrogen_0.5cm **
sand_0.5cm
soc_5.15cm **
O.largestPlotGPS.Latitude *
O.largestPlotGPS.Longitude ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.6652 on 7539 degrees of freedom
(86 observations deleted due to missingness)
Multiple R-squared: 0.3888, Adjusted R-squared: 0.387
F-statistic: 217.9 on 22 and 7539 DF, p-value: < 2.2e-16
library(stargazer)
stargazer(ols,
type="text",
keep.stat=c("n","rsq"))
==================================================================
Dependent variable:
---------------------------
L.tonPerHectare
------------------------------------------------------------------
Sowing_Date_Schedule_UnorderedT4_15Dec 0.262***
(0.031)
Sowing_Date_Schedule_UnorderedT3_30Nov 0.489***
(0.032)
Sowing_Date_Schedule_UnorderedT2_20Nov 0.695***
(0.034)
Sowing_Date_Schedule_UnorderedT1_10Nov 0.870***
(0.045)
Nperha 0.001***
(0.0002)
P2O5perha 0.003***
(0.0004)
variety_type_NMWV 0.316***
(0.018)
G.q5305_irrigTimes 0.349***
(0.011)
I.q5505_weedSeverity_num -0.075***
(0.010)
I.q5509_diseaseSeverity_num -0.015
(0.013)
I.q5506_insectSeverity_num 0.025**
(0.012)
A.q111_fGenderdum -0.084*
(0.046)
Weedmanaged 0.242***
(0.019)
temp 0.005
(0.026)
precip -0.00005
(0.00003)
wc2.1_30s_elev -0.002***
(0.0005)
M.q708_marketDistance -0.003
(0.002)
nitrogen_0.5cm 0.142***
(0.051)
sand_0.5cm 0.002
(0.003)
soc_5.15cm 0.016***
(0.005)
O.largestPlotGPS.Latitude 0.041**
(0.019)
O.largestPlotGPS.Longitude -0.089***
(0.010)
Constant 7.335***
(1.319)
------------------------------------------------------------------
Observations 7,562
R2 0.389
==================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
library(modelsummary)
b <- list(geom_vline(xintercept = 0, color = 'orange'))
modelplot(ols,background = b,coef_omit = "Interc")anova(ols)Analysis of Variance Table
Response: L.tonPerHectare
Df Sum Sq Mean Sq F value Pr(>F)
Sowing_Date_Schedule_Unordered 4 936.2 234.06 529.0212 < 2.2e-16 ***
Nperha 1 232.9 232.92 526.4608 < 2.2e-16 ***
P2O5perha 1 27.0 27.04 61.1079 6.136e-15 ***
variety_type_NMWV 1 278.5 278.49 629.4484 < 2.2e-16 ***
G.q5305_irrigTimes 1 481.4 481.39 1088.0474 < 2.2e-16 ***
I.q5505_weedSeverity_num 1 17.4 17.38 39.2786 3.877e-10 ***
I.q5509_diseaseSeverity_num 1 1.4 1.43 3.2348 0.07213 .
I.q5506_insectSeverity_num 1 0.6 0.58 1.3158 0.25138
A.q111_fGenderdum 1 2.0 2.00 4.5245 0.03345 *
Weedmanaged 1 81.1 81.05 183.1994 < 2.2e-16 ***
temp 1 0.3 0.32 0.7331 0.39192
precip 1 2.8 2.78 6.2890 0.01217 *
wc2.1_30s_elev 1 0.0 0.00 0.0025 0.96004
M.q708_marketDistance 1 2.0 1.98 4.4796 0.03434 *
nitrogen_0.5cm 1 0.9 0.85 1.9274 0.16509
sand_0.5cm 1 0.0 0.00 0.0093 0.92334
soc_5.15cm 1 1.3 1.28 2.8857 0.08941 .
O.largestPlotGPS.Latitude 1 22.3 22.26 50.3070 1.434e-12 ***
O.largestPlotGPS.Longitude 1 33.4 33.41 75.5172 < 2.2e-16 ***
Residuals 7539 3335.5 0.44
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Shapley value regression -----
# library(ShapleyValue)
#
# y <- LDSestim$L.tonPerHectare
# x=subset(LDSestim, select=c("I.q5505_weedSeverity_num","I.q5509_diseaseSeverity_num","I.q5506_insectSeverity_num",
# "Nperha","P2O5perha","variety_type_NMWV","G.q5305_irrigTimes","A.q111_fGenderdum","Weedmanaged","temp","precip","wc2.1_30s_elev",
# "M.q708_marketDistance","nitrogen_0.5cm","sand_0.5cm", "soc_5.15cm","O.largestPlotGPS.Latitude","O.largestPlotGPS.Longitude"))
#
# # Note: This takes a lot of time!
# value <- shapleyvalue(y,x)
#
# library(kableExtra)
# value %>%
# kbl() %>%
# kable_classic(full_width = F, html_font = "Cambria")
#
#
# shapleyvaluet=as.data.frame(t(value))
# shapleyvaluet=cbind(rownames(shapleyvaluet), data.frame(shapleyvaluet, row.names=NULL))
# names(shapleyvaluet)[1]="vars"
#
# library(ggplot2)
# shapleyvalueplot=ggplot(shapleyvaluet,aes(x=reorder(vars,Standardized.Shapley.Value),y=Standardized.Shapley.Value))+
# geom_jitter(color="steelblue")+
# coord_flip()+
# labs(x="Variables",y="Standardized.Shapley.Value")
# previous_theme <- theme_set(theme_bw())
# shapleyvalueplotlibrary(grf)
library(policytree)
LDSestim_sow=subset(LDSestim, select=c("Sowing_Date_Schedule","L.tonPerHectare","I.q5505_weedSeverity_num","I.q5509_diseaseSeverity_num","I.q5506_insectSeverity_num","I.q5502_droughtSeverity_num", "Nperha","P2O5perha","variety_type_NMWV","G.q5305_irrigTimes","A.q111_fGenderdum","Weedmanaged","temp","precip","wc2.1_30s_elev","M.q708_marketDistance","nitrogen_0.5cm","sand_0.5cm", "soc_5.15cm","O.largestPlotGPS.Latitude","O.largestPlotGPS.Longitude"))
library(tidyr)
LDSestim_sow=LDSestim_sow %>% drop_na()
Y_cf_sowing=as.vector(LDSestim_sow$L.tonPerHectare)
## Causal random forest -----------------
X_cf_sowing=subset(LDSestim_sow, select=c("I.q5505_weedSeverity_num","I.q5509_diseaseSeverity_num","I.q5506_insectSeverity_num",
"Nperha","P2O5perha","variety_type_NMWV","G.q5305_irrigTimes","A.q111_fGenderdum","Weedmanaged","temp","precip","wc2.1_30s_elev",
"M.q708_marketDistance","nitrogen_0.5cm","sand_0.5cm", "soc_5.15cm","O.largestPlotGPS.Latitude","O.largestPlotGPS.Longitude"))
W_cf_sowing <- as.factor(LDSestim_sow$Sowing_Date_Schedule)
W.multi_sowing.forest <- probability_forest(X_cf_sowing, W_cf_sowing,
equalize.cluster.weights = FALSE,
seed = 2
)
W.hat.multi.all_sowing <- predict(W.multi_sowing.forest, estimate.variance = TRUE)$predictions
Y.multi_sowing.forest <- regression_forest(X_cf_sowing, Y_cf_sowing,
equalize.cluster.weights = FALSE,
seed = 2
)
print(Y.multi_sowing.forest)GRF forest object of type regression_forest
Number of trees: 2000
Number of training samples: 7562
Variable importance:
1 2 3 4 5 6 7 8 9 10 11 12 13
0.000 0.000 0.001 0.021 0.015 0.651 0.193 0.000 0.064 0.001 0.001 0.006 0.001
14 15 16 17 18
0.001 0.004 0.002 0.023 0.015
varimp.multi_sowing <- variable_importance(Y.multi_sowing.forest)
Y.hat.multi.all_sowing <- predict(Y.multi_sowing.forest, estimate.variance = TRUE)$predictions
multi_sowing.forest <- multi_arm_causal_forest(X = X_cf_sowing, Y = Y_cf_sowing, W = W_cf_sowing ,W.hat=W.hat.multi.all_sowing,Y.hat=Y.hat.multi.all_sowing,seed=2)
varimp.multi_sowing_cf <- variable_importance(multi_sowing.forest)
multi_sowing_ate=average_treatment_effect(multi_sowing.forest, method="AIPW")
multi_sowing_ate estimate std.err contrast outcome
T4_15Dec - T5_16Dec 0.2358984 0.02577961 T4_15Dec - T5_16Dec Y.1
T3_30Nov - T5_16Dec 0.4245938 0.02284938 T3_30Nov - T5_16Dec Y.1
T2_20Nov - T5_16Dec 0.5638900 0.02572197 T2_20Nov - T5_16Dec Y.1
T1_10Nov - T5_16Dec 0.6981874 0.03905789 T1_10Nov - T5_16Dec Y.1
varimp.multi_sowing_cf <- variable_importance(multi_sowing.forest)
vars_sowing=c("I.q5505_weedSeverity_num","I.q5509_diseaseSeverity_num","I.q5506_insectSeverity_num",
"Nperha","P2O5perha","variety_type_NMWV","G.q5305_irrigTimes","A.q111_fGenderdum","Weedmanaged","temp","precip","wc2.1_30s_elev",
"M.q708_marketDistance","nitrogen_0.5cm","sand_0.5cm", "soc_5.15cm","O.largestPlotGPS.Latitude","O.largestPlotGPS.Longitude")
## variable importance plot ----------------------------------------------------
varimpvars_sowing=as.data.frame(cbind(varimp.multi_sowing_cf,vars_sowing))
names(varimpvars_sowing)[1]="Variableimportance_sowing"
varimpvars_sowing$Variableimportance_sowing=formatC(varimpvars_sowing$Variableimportance_sowing, digits = 2, format = "f")
varimpvars_sowing$Variableimportance_sowing=as.numeric(varimpvars_sowing$Variableimportance_sowing)
varimpplotRF_sowing=ggplot(varimpvars_sowing,aes(x=reorder(vars_sowing,Variableimportance_sowing),y=Variableimportance_sowing))+
geom_jitter(color="steelblue")+
coord_flip()+
labs(x="Variables",y="Variable importance")
previous_theme <- theme_set(theme_bw(base_size = 16))
varimpplotRF_sowing# Policy tree --------------------------------------
DR.scores_sowing <- double_robust_scores(multi_sowing.forest)
tr_sowing <- policy_tree(X_cf_sowing, DR.scores_sowing, depth = 2)
plot(tr_sowing)tr_sowing3 <- hybrid_policy_tree(X_cf_sowing, DR.scores_sowing, depth = 3)
tr_sowing3policy_tree object
Tree depth: 3
Actions: 1: T5_16Dec 2: T4_15Dec 3: T3_30Nov 4: T2_20Nov 5: T1_10Nov
Variable splits:
(1) split_variable: O.largestPlotGPS.Latitude split_value: 25.84
(2) split_variable: P2O5perha split_value: 62.4691
(4) split_variable: O.largestPlotGPS.Latitude split_value: 25.42
(8) * action: 5
(9) * action: 4
(5) split_variable: soc_5.15cm split_value: 9.1
(10) * action: 4
(11) * action: 5
(3) split_variable: O.largestPlotGPS.Longitude split_value: 83.47
(6) split_variable: precip split_value: 710.4
(12) * action: 5
(13) * action: 4
(7) split_variable: temp split_value: 25.6
(14) * action: 2
(15) * action: 5
plot(tr_sowing3)tr_sowing4 <- hybrid_policy_tree(X_cf_sowing, DR.scores_sowing, depth = 4)
tr_sowing4policy_tree object
Tree depth: 4
Actions: 1: T5_16Dec 2: T4_15Dec 3: T3_30Nov 4: T2_20Nov 5: T1_10Nov
Variable splits:
(1) split_variable: O.largestPlotGPS.Latitude split_value: 25.84
(2) split_variable: P2O5perha split_value: 62.4691
(4) split_variable: soc_5.15cm split_value: 16.2
(8) split_variable: O.largestPlotGPS.Latitude split_value: 25.42
(16) * action: 5
(17) * action: 4
(9) split_variable: I.q5509_diseaseSeverity_num split_value: 2
(18) * action: 4
(19) * action: 5
(5) split_variable: I.q5505_weedSeverity_num split_value: 3
(10) split_variable: soc_5.15cm split_value: 9.1
(20) * action: 4
(21) * action: 5
(11) split_variable: precip split_value: 785.4
(22) * action: 4
(23) * action: 5
(3) split_variable: O.largestPlotGPS.Longitude split_value: 83.47
(6) split_variable: P2O5perha split_value: 56.7901
(12) split_variable: precip split_value: 710.4
(24) * action: 5
(25) * action: 4
(13) split_variable: temp split_value: 26.05
(26) * action: 5
(27) * action: 4
(7) split_variable: P2O5perha split_value: 74.963
(14) split_variable: temp split_value: 25.6
(28) * action: 2
(29) * action: 5
(15) split_variable: temp split_value: 26.1333
(30) * action: 5
(31) * action: 4
plot(tr_sowing4)tr_assignment_sowing=LDSestim_sow
tr_assignment_sowing$depth2 <- predict(tr_sowing, X_cf_sowing)
table(tr_assignment_sowing$depth2)
4 5
2031 5531
tr_assignment_sowing$depth3 <- predict(tr_sowing3, X_cf_sowing)
table(tr_assignment_sowing$depth3)
2 4 5
183 1252 6127
tr_assignment_sowing$depth4 <- predict(tr_sowing4, X_cf_sowing)
table(tr_assignment_sowing$depth4)
2 4 5
159 1565 5838
library(rgdal)
tr_assignment_sowing$depth2_cat[tr_assignment_sowing$depth2==1]="T5_16Dec"
tr_assignment_sowing$depth2_cat[tr_assignment_sowing$depth2==2]="T4_15Dec"
tr_assignment_sowing$depth2_cat[tr_assignment_sowing$depth2==3]="T3_30Nov"
tr_assignment_sowing$depth2_cat[tr_assignment_sowing$depth2==4]="T2_20Nov"
tr_assignment_sowing$depth2_cat[tr_assignment_sowing$depth2==5]="T1_10Nov"
tr_assignment_sowingsp= SpatialPointsDataFrame(cbind(tr_assignment_sowing$O.largestPlotGPS.Longitude,tr_assignment_sowing$O.largestPlotGPS.Latitude),data=tr_assignment_sowing,proj4string=CRS("+proj=longlat +datum=WGS84"))
library(mapview)
mapviewOptions(fgb = FALSE)
tr_assignment_sowingspmapview=mapview(tr_assignment_sowingsp,zcol="depth2_cat",layer.name="Recommended sowing dates")
tr_assignment_sowingspmapview